1 Introduction

Single-cell analysis of human dataset.

2 Load dataset

library(dplyr)
library(Seurat)
library(ggplot2)
library(patchwork)
random_seed <- 12345

data <- readRDS('../data/raw/MARS-Fung.RDS')
raw_ann <- CreateSeuratObject(data$counts, meta.data = data$metadata)

2.1 Quality plots

raw_ann[['percent.mito']] <- PercentageFeatureSet(raw_ann, pattern = "^MT-")
raw_ann[['percent.ercc']] <- PercentageFeatureSet(raw_ann, pattern = "^ERCC-")
raw_ann[['percent.ribo']] <- PercentageFeatureSet(raw_ann, pattern = "^RP[LS]")
VlnPlot(raw_ann, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"),
        ncol = 4)

FeatureScatter(raw_ann, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage")

2.2 Filtering

print(paste0("Before filtering: ", dim(raw_ann)[2], " cells ",  dim(raw_ann)[1], " genes"))
## [1] "Before filtering: 1920 cells 57874 genes"
# Remove ERCC
raw_ann <- raw_ann[rownames(raw_ann)[!grepl('ERCC-', rownames(raw_ann))], ]
# Remove Zero stage
raw_ann <- raw_ann[, raw_ann@meta.data$Stage != 'Zero']
nbins <- 100
min_cells <- 2000
max_cells <- 35e3
min_genes <- 550
max_genes <- 4950

ggplot(raw_ann@meta.data, aes(x=nCount_RNA)) + 
  geom_histogram(bins = nbins) + 
  geom_vline(aes(xintercept=min_cells), linetype="dashed", color='red') +
  geom_vline(aes(xintercept=max_cells), linetype="dashed", color='red')

ggplot(raw_ann@meta.data, aes(x=nFeature_RNA)) + 
  geom_histogram(bins = nbins) +
  geom_vline(aes(xintercept=min_genes), linetype="dashed", color='red') +
  geom_vline(aes(xintercept=max_genes), linetype="dashed", color='red')

ggplot(raw_ann@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mito)) + 
  geom_point() + 
  scale_color_continuous(type = "viridis") +
  geom_vline(aes(xintercept=min_cells), linetype="dashed", color='red') +
  geom_vline(aes(xintercept=max_cells), linetype="dashed", color='red') +
  geom_hline(aes(yintercept=min_genes), linetype="dashed", color='red') +
  geom_hline(aes(yintercept=max_genes), linetype="dashed", color='red')

adata <- subset(raw_ann, subset = 
                    nFeature_RNA > min_genes & nFeature_RNA < max_genes & 
                    nCount_RNA > min_cells & nCount_RNA < max_cells & 
                    percent.mito < 20)
adata <- CreateSeuratObject(adata@assays$RNA@counts, min.cells = 3, meta.data = adata@meta.data)

2.3 After filtering

print(paste0("After filtering: ", dim(adata)[2], " cells ",  dim(adata)[1], " genes"))
## [1] "After filtering: 1702 cells 23050 genes"
VlnPlot(adata, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"),
        ncol = 4)

2.4 Save dataset

saveRDS(adata, file = "../data/processed/01_fung.filtered.RDS")

3 Subset

adata <- adata[, adata$Condition %in% c("VFG53_1025", "VFG53_-BMP_1101", "VFG53_-BMP+FGF_1109")]
dim(adata)
## [1] 23050   562

4 Normalization

adata <- NormalizeData(adata)

5 HVG

adata <- FindVariableFeatures(adata, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(adata), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(adata)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2

6 Scale data

adata <- ScaleData(adata)

7 Cell Cycle

adata <- CellCycleScoring(adata, 
                          s.features = cc.genes.updated.2019$s.genes, 
                          g2m.features = cc.genes.updated.2019$g2m.genes, 
                          set.ident = TRUE)

8 PCA

adata <- RunPCA(adata, npcs = 50)
ElbowPlot(adata)

DimPlot(adata, reduction = 'pca', group.by = 'Stage')

DimPlot(adata, reduction = 'pca', group.by = 'Phase')

Idents(adata) <- 'Stage'
commonR::plot_proportion_bar(adata, group.by = "Phase") + ggtitle("Cell cycle proportion per Stage")

9 Clustering & UMAP (resolution 0.5)

adata <- FindNeighbors(adata, dims = 1:20)
adata <- FindClusters(adata, random.seed = random_seed, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 562
## Number of edges: 29045
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6803
## Number of communities: 2
## Elapsed time: 0 seconds
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed)
DimPlot(adata, reduction = "umap")

DimPlot(adata, reduction = "umap", group.by = 'Stage')

DimPlot(adata, reduction = "umap", group.by = 'Phase')

markers <- FindAllMarkers(adata, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.5)
markers %>%
    group_by(cluster) %>%
    slice_max(n = 10, order_by = avg_log2FC)

10 Clustering & UMAP (resolution 0.6)

adata <- FindNeighbors(adata, dims = 1:20)
adata <- FindClusters(adata, random.seed = random_seed, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 562
## Number of edges: 29045
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6357
## Number of communities: 3
## Elapsed time: 0 seconds
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed)
DimPlot(adata, reduction = "umap")

DimPlot(adata, reduction = "umap", group.by = 'Stage')

DimPlot(adata, reduction = "umap", group.by = 'Phase')

markers <- FindAllMarkers(adata, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.5)
markers %>%
    group_by(cluster) %>%
    slice_max(n = 10, order_by = avg_log2FC)

11 Save analysis

saveRDS(adata, file = '../data/processed/01_fung.RDS')

12 Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] patchwork_1.1.1    ggplot2_3.3.5      SeuratObject_4.0.4 Seurat_4.1.0      
## [5] dplyr_1.0.7       
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15            colorspace_2.0-2      ggsignif_0.6.3       
##   [4] deldir_1.0-6          ellipsis_0.3.2        ggridges_0.5.3       
##   [7] rstudioapi_0.13       spatstat.data_2.1-2   leiden_0.3.9         
##  [10] listenv_0.8.0         farver_2.1.0          ggrepel_0.9.1        
##  [13] RSpectra_0.16-0       fansi_1.0.2           codetools_0.2-18     
##  [16] splines_4.1.2         knitr_1.37            polyclip_1.10-0      
##  [19] jsonlite_1.7.2        ica_1.0-2             cluster_2.1.2        
##  [22] png_0.1-7             uwot_0.1.11           shiny_1.7.1          
##  [25] sctransform_0.3.3     spatstat.sparse_2.1-0 compiler_4.1.2       
##  [28] httr_1.4.2            Matrix_1.4-0          fastmap_1.1.0        
##  [31] lazyeval_0.2.2        cli_3.1.0             limma_3.50.0         
##  [34] later_1.3.0           htmltools_0.5.2       tools_4.1.2          
##  [37] igraph_1.2.11         gtable_0.3.0          glue_1.6.0           
##  [40] RANN_2.6.1            reshape2_1.4.4        tinytex_0.36         
##  [43] Rcpp_1.0.8            scattermore_0.7       jquerylib_0.1.4      
##  [46] vctrs_0.3.8           nlme_3.1-155          lmtest_0.9-39        
##  [49] xfun_0.29             stringr_1.4.0         globals_0.14.0       
##  [52] mime_0.12             miniUI_0.1.1.1        lifecycle_1.0.1      
##  [55] irlba_2.3.5           commonR_0.1.0         goftest_1.2-3        
##  [58] future_1.23.0         MASS_7.3-55           zoo_1.8-9            
##  [61] scales_1.1.1          spatstat.core_2.3-2   promises_1.2.0.1     
##  [64] spatstat.utils_2.3-0  parallel_4.1.2        RColorBrewer_1.1-2   
##  [67] yaml_2.2.1            reticulate_1.23       pbapply_1.5-0        
##  [70] gridExtra_2.3         sass_0.4.0            rpart_4.1-15         
##  [73] stringi_1.7.6         highr_0.9             rlang_0.4.12         
##  [76] pkgconfig_2.0.3       matrixStats_0.61.0    evaluate_0.14        
##  [79] lattice_0.20-45       ROCR_1.0-11           purrr_0.3.4          
##  [82] tensor_1.5            htmlwidgets_1.5.4     labeling_0.4.2       
##  [85] cowplot_1.1.1         tidyselect_1.1.1      parallelly_1.30.0    
##  [88] RcppAnnoy_0.0.19      plyr_1.8.6            magrittr_2.0.1       
##  [91] R6_2.5.1              generics_0.1.1        DBI_1.1.2            
##  [94] pillar_1.6.4          withr_2.4.3           mgcv_1.8-38          
##  [97] fitdistrplus_1.1-6    survival_3.2-13       abind_1.4-5          
## [100] tibble_3.1.6          future.apply_1.8.1    crayon_1.4.2         
## [103] KernSmooth_2.23-20    utf8_1.2.2            spatstat.geom_2.3-1  
## [106] plotly_4.10.0         rmarkdown_2.11        grid_4.1.2           
## [109] data.table_1.14.2     digest_0.6.29         xtable_1.8-4         
## [112] tidyr_1.1.4           httpuv_1.6.5          munsell_0.5.0        
## [115] viridisLite_0.4.0     bslib_0.3.1